library(here)
library(tidyverse)
library(plotly)
library(scales)
library(lubridate)in_file = here::here("data", "fecundity", "20210205_quantitative.xlsx")
df_quant = readxl::read_xlsx(in_file)# File to save to
out_file = here::here("data", "fecundity", "20210208_quant.csv")
# Remove NAs
df_quant = df_quant %>%
filter(!is.na(Strain))
# Split into list
quant_list = list("F15" = df_quant[, 1:8],
"F16" = df_quant[, c(1, 9:length(df_quant))])
# Pivot longer
out_df = lapply(quant_list, function(generation){
out = generation %>%
# Pivot longer
tidyr::pivot_longer(cols = contains("eggs"),
names_to = "DATE",
names_prefix = "eggs ",
values_to = "EGG_COUNT") %>%
# Convert into date
dplyr::mutate(DATE = DATE %>%
str_c("2020") %>%
str_replace_all("\\.", "-") %>%
as.Date("%d-%m-%Y")) %>%
# Get weekday
dplyr::mutate(WEEKDAY = weekdays(DATE)) %>%
# Rename columns
dplyr::select(STRAIN = "Strain",
FEMALE_COUNT = contains("females"),
DATE, WEEKDAY, EGG_COUNT,
everything()) %>%
# Replace question marks in FEMALE_COUNT and convert to integer
dplyr::mutate(FEMALE_COUNT = str_replace(FEMALE_COUNT, "\\?", "") %>%
as.integer()) %>%
# Get eggs per female
dplyr::mutate(EGGS_PER_FEMALE = EGG_COUNT / FEMALE_COUNT)
return(out)
}) %>%
# bind in to DF
dplyr::bind_rows(.id = "GENERATION") %>%
# Write to CSV
readr::write_csv(out_file, na = "")
# Adapt variables for plotting
strain_levels = unique(out_df$STRAIN)
pal_primary = hue_pal()(length(strain_levels))
names(pal_primary) = strain_levels
out_df$STRAIN = factor(out_df$STRAIN, levels = strain_levels)
out_df$WEEKDAY = factor(out_df$WEEKDAY,
levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
knitr::kable(head(out_df))| GENERATION | STRAIN | FEMALE_COUNT | DATE | WEEKDAY | EGG_COUNT | EGGS_PER_FEMALE |
|---|---|---|---|---|---|---|
| F15 | 4-1 | NA | 2020-07-02 | Thursday | NA | NA |
| F15 | 4-1 | NA | 2020-07-03 | Friday | NA | NA |
| F15 | 4-1 | NA | 2020-07-21 | Tuesday | NA | NA |
| F15 | 4-1 | NA | 2020-07-23 | Thursday | NA | NA |
| F15 | 4-1 | NA | 2020-07-28 | Tuesday | NA | NA |
| F15 | 4-1 | NA | 2020-07-30 | Thursday | NA | NA |
corr_plot = out_df %>%
dplyr::group_by(GENERATION, STRAIN) %>%
summarise(mean(EGGS_PER_FEMALE)) %>%
tidyr::pivot_wider(names_from = GENERATION,
values_from = `mean(EGGS_PER_FEMALE)`) %>%
ggplot() +
geom_point(aes(F15, F16, colour = STRAIN)) +
coord_fixed() +
theme_bw() +
guides(colour = F) +
xlim(0,7) +
ylim(0,7) +
ggtitle("Mean eggs per female") +
scale_color_manual(values = pal_primary)## `summarise()` has grouped output by 'GENERATION'. You can override using the `.groups` argument.
# Plotly
ggplotly(corr_plot, height = 400, width = 400) %>%
layout(showlegend = F)collection_day_plot = out_df %>%
ggplot() +
geom_point(aes(DATE, EGGS_PER_FEMALE, colour = STRAIN), alpha = 0.8) +
facet_wrap(vars(GENERATION, WEEKDAY)) +
guides(colour = F) +
theme_bw() +
xlab("Date") +
ylab("Mean eggs produced per female") +
scale_color_manual(values = pal_primary)
ggplotly(collection_day_plot, height = 1000, width = 800) %>%
layout(showlegend = F)## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# Get function to reverse labels in ggplotly (from https://stackoverflow.com/questions/59611914/reverse-the-legend-order-when-using-ggplotly)
reverse_legend_labels <- function(plotly_plot) {
n_labels <- length(plotly_plot$x$data)
plotly_plot$x$data[1:n_labels] <- plotly_plot$x$data[n_labels:1]
plotly_plot
}
violin_plot = out_df %>%
dplyr::mutate(STRAIN = factor(STRAIN, levels = rev(strain_levels))) %>%
ggplot() +
geom_violin(aes(STRAIN, EGGS_PER_FEMALE, fill = STRAIN, colour = STRAIN)) +
geom_jitter(aes(STRAIN, EGGS_PER_FEMALE, label = GENERATION), size = 0.25) +
theme_bw() +
coord_flip() +
guides(fill = F, colour = F) +
xlab("MIKK panel line") +
ylab("Mean eggs produced per female") +
scale_color_manual(values = pal_primary) +
scale_fill_manual(values = pal_primary)## Warning: Ignoring unknown aesthetics: label
violin_plot %>%
plotly::ggplotly(height = 1200, width = 700) %>%
reverse_legend_labels() %>%
layout(showlegend = F)## Warning: Removed 518 rows containing non-finite values (stat_ydensity).
in_file = here::here("data", "fecundity", "20210205_semiquantitative.xlsx")
df_semi = readxl::read_xlsx(in_file, range = "A1:C81")
out_file = here::here("data", "fecundity", "20210208_semiquant.csv")
# One sample is missing from this dataset. Which one?
df_quant$Strain[which(!df_quant$Strain %in% df_semi$Pair)][1] “91-1”
# Create recode vector
date_recode = c("Feb 2019", "Jul 2020")
names(date_recode) = c("2/19", "7/20")
recode_vec_1 = c(0, 1, 2, 3, 4, 5)
names(recode_vec_1) = c(0, "o", "x", "x/", "xx", "xxx")
recode_vec_2 = c("Not producing",
"Do not produce every day; <3 eggs when they do",
"Do not produce every day; <5 eggs when they do",
"0-3 eggs per day",
"0-5 eggs per day",
"5-10 eggs per day")
names(recode_vec_2) = c(0, 1, 2, 3, 4, 5)
recode_vec_3 = gsub("; ", ";\n", recode_vec_2)
# Tidy
semi_out = df_semi %>%
# pivot fecundity
tidyr::pivot_longer(cols = contains("fecundity"),
names_to = "DATE",
names_prefix = "fecundity ",
values_to = "FECUNDITY") %>%
# recode fecundity measures
dplyr::mutate(DATE = dplyr::recode(DATE, !!!date_recode),
FECUNDITY = dplyr::recode(FECUNDITY, "xx/" = "xx"),
FECUNDITY = dplyr::na_if(FECUNDITY, "do not prod. Yet"),
FECUNDITY = ifelse(is.na(FECUNDITY), 0, FECUNDITY),
FECUNDITY = dplyr::recode(FECUNDITY, !!!factor(recode_vec_1)),
KEY = dplyr::recode(FECUNDITY, !!!recode_vec_2)) %>%
# rename STRAIN
dplyr::rename(STRAIN = Pair) %>%
# factorise
dplyr::mutate(STRAIN = factor(STRAIN, levels = strain_levels)) %>%
# write to file
readr::write_csv(out_file, na = "")
knitr::kable(head(semi_out))| STRAIN | DATE | FECUNDITY | KEY |
|---|---|---|---|
| 4-1 | Jul 2020 | 2 | Do not produce every day; <5 eggs when they do |
| 4-1 | Feb 2019 | 2 | Do not produce every day; <5 eggs when they do |
| 4-2 | Jul 2020 | 2 | Do not produce every day; <5 eggs when they do |
| 4-2 | Feb 2019 | 4 | 0-5 eggs per day |
| 5-1 | Jul 2020 | 2 | Do not produce every day; <5 eggs when they do |
| 5-1 | Feb 2019 | 4 | 0-5 eggs per day |
semi_all = semi_out %>%
dplyr::mutate(STRAIN = factor(STRAIN, levels = rev(strain_levels)),
KEY = gsub("; ", ";\n", KEY)) %>%
ggplot() +
geom_col(aes(STRAIN, KEY, fill = STRAIN)) +
theme_bw() +
scale_fill_manual(values = pal_primary) +
facet_wrap(vars(DATE), ncol = 2) +
guides(fill = F) +
coord_flip() +
theme(axis.text.x = element_text(size = 4)) +
xlab("MIKK line") +
ylab("Fecundity")
ggplotly(semi_all, width = 1200, height = 1000) %>%
reverse_legend_labels() %>%
layout(showlegend = F)semi_corr = semi_out %>%
tidyr::pivot_wider(id_cols = STRAIN,
names_from = DATE,
values_from = FECUNDITY) %>%
ggplot() +
geom_jitter(aes(`Feb 2019`, `Jul 2020`, colour = STRAIN), alpha = 0.7) +
coord_fixed() +
theme_bw() +
guides(colour = F) +
ggtitle("Correlation in semi-quantitative measure") +
scale_color_manual(values = pal_primary)
# Plotly
ggplotly(semi_corr, height = 400, width = 400) %>%
layout(showlegend = F)semi_final = semi_out %>%
dplyr::mutate(STRAIN = factor(STRAIN, levels = rev(strain_levels)),
KEY = gsub("; ", ";\n", KEY)) %>%
dplyr::filter(DATE == "Jul 2020") %>%
ggplot() +
geom_col(aes(STRAIN, KEY, fill = STRAIN)) +
theme_bw() +
scale_fill_manual(values = pal_primary) +
guides(fill = F) +
coord_flip() +
theme(axis.text.x = element_text(size = 4)) +
xlab("MIKK panel line") +
ylab("Fecundity") +
ggtitle("Fecundity of the MIKK panel lines as of July 2020")
ggplotly(semi_final, width = 600, height = 1000, tooltip = c("STRAIN", "KEY")) %>%
reverse_legend_labels() %>%
layout(showlegend = F)Horizontal
final_hor = semi_out %>%
dplyr::mutate(KEY = gsub("; ", ";\n", KEY),
KEY = factor(KEY, levels = recode_vec_3)) %>%
dplyr::filter(DATE == "Jul 2020") %>%
ggplot() +
geom_col(aes(STRAIN, KEY, fill = STRAIN)) +
theme_bw() +
scale_fill_manual(values = pal_primary) +
guides(fill = F) +
theme(axis.text.x = element_text(size = 3.5,
angle = 45),
axis.text.y = element_text(size = 5)) +
xlab("MIKK panel line") +
ylab(NULL) +
ggtitle("Fecundity of the MIKK panel lines as of July 2020")
ggplotly(final_hor, width = 900, height = 300) %>%
layout(showlegend = F)